## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## [conflicted] Will prefer dplyr::filter over any other package.
## [conflicted] Will prefer dplyr::select over any other package.
## [conflicted] Will prefer dplyr::slice over any other package.
## [conflicted] Will prefer dplyr::rename over any other package.
## [conflicted] Will prefer dplyr::intersect over any other package.
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
mytheme = theme_bw(base_size = 10) +
theme(text = element_text(size=10, colour='black'),
title = element_text(size=10, colour='black'),
line = element_line(size=0.5),
axis.title = element_text(size=10, colour='black'),
axis.text = element_text(size=10, colour='black'),
axis.line = element_line(colour = "black"),
axis.ticks = element_line(size=0.5),
strip.background = element_blank(),
strip.text.y = element_blank(),
panel.grid = element_blank(),
panel.border = element_blank(),
legend.position = c(0.8, 0.8),
legend.text=element_text(size=10)) ## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Rows: 1560 Columns: 48
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (27): chr, strand, circ_id, circ_id_strand, tool, cell_line, FWD_primer,...
## dbl (21): start, end, FWD_pos, FWD_length, REV_pos, REV_length, FWD_TM, REV_...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1137099 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (16): chr, strand, cell_line, tool, circ_id, circ_id_strand, count_group...
## dbl (7): start, end, BSJ_count, n_detected, n_db, estim_len_in, BSJ_count_m...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 29 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): tool, count_group
## dbl (16): nr_qPCR_total, nr_qPCR_fail, nr_qPCR_val, nr_RR_total, nr_RR_fail,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all_circ$tool = factor(all_circ$tool, levels = c("circseq_cup", "CIRI2", "CIRIquant", "CircSplice", "find_circ", "CirComPara2", "CIRCexplorer3", "circtools", "Sailfish-cir", "NCLscan", "NCLcomparator", "PFv2", "ecircscreen", "KNIFE", "circRNA_finder", "segemehl"))
cq$tool = factor(cq$tool, levels = c("circseq_cup", "CIRI2", "CIRIquant", "CircSplice", "find_circ", "CirComPara2", "CIRCexplorer3", "circtools", "Sailfish-cir", "NCLscan", "NCLcomparator", "PFv2", "ecircscreen", "KNIFE", "circRNA_finder", "segemehl"))
val$tool = factor(val$tool, levels = c("circseq_cup", "CIRI2", "CIRIquant", "CircSplice", "find_circ", "CirComPara2", "CIRCexplorer3", "circtools", "Sailfish-cir", "NCLscan", "NCLcomparator", "PFv2", "ecircscreen", "KNIFE", "circRNA_finder", "segemehl"))
cqsee panel 2 scripts
all_circ %>%
filter(!tool == "Sailfish-cir") %>%
ggplot(aes(tool, BSJ_count)) +
geom_boxplot() +
scale_y_log10(labels = scales::comma_format()) +
ylab("BSJ count") +
xlab('') +
mytheme_discrete_xnumbers
count_conc = tibble()
for (tool_1 in val %>% pull(tool) %>% unique()){
for (tool_2 in val %>% pull(tool) %>% unique()){
count_conc = count_conc %>%
bind_rows(all_circ %>% filter(tool == tool_1) %>%
select(cell_line, circ_id, BSJ_count, tool) %>% unique() %>%
rename(BSJ_count_1 = BSJ_count, tool_1 = tool) %>%
inner_join(all_circ %>% filter(tool == tool_2) %>%
select(cell_line, circ_id, BSJ_count, tool) %>% unique() %>%
rename(BSJ_count_2 = BSJ_count, tool_2 = tool)))
}
}## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Warning in inner_join(., all_circ %>% filter(tool == tool_2) %>% select(cell_line, : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 4501 of `x` matches multiple rows in `y`.
## ℹ Row 4501 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Warning in inner_join(., all_circ %>% filter(tool == tool_2) %>% select(cell_line, : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 170 of `x` matches multiple rows in `y`.
## ℹ Row 170 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
## Joining with `by = join_by(cell_line, circ_id)`
# only keep unique combo's (every combo is calculated twice) => remove this step!
count_conc = count_conc %>%
mutate(tool_comb = paste(tool_1, tool_2, sep = "/")) #,
# tool_comb = map_chr(str_split(tool_comb, "/"),
# ~str_c(str_sort(unique(.x)), collapse = "/"))) #%>%
# group_by(tool_comb, circ_id, cell_line) %>%
# arrange(tool_1) %>%
# filter(row_number()==1) %>%
# ungroup()
count_conc %>%
filter(!tool_1 == tool_2) %>%
filter(!tool_1 == "Sailfish-cir", !tool_2 == 'Sailfish-cir') %>%
#filter(tool_1 == "Sailfish-cir" | tool_2 == 'Sailfish-cir') %>%
ggplot(aes(BSJ_count_1, BSJ_count_2)) +
geom_smooth(method = "lm", color = '#00B9F2') +
stat_regline_equation(label.x = -Inf, label.y = Inf, vjust = 1.5, hjust = -0.1, size = 3) +
# stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "*`,`~")),
# label.y= Inf, label.x = -Inf, vjust = 2, hjust = -0.1, size = 3) +
stat_cor(aes(label = ..rr.label..),
label.y= Inf, label.x = -Inf, vjust = 3, hjust = -0.1, size = 3) +
stat_cor(aes(label = ..p.label..),
label.y= Inf, label.x = -Inf, vjust = 5, hjust = -0.1, size = 3) +
mytheme_discrete_x +
theme(aspect.ratio = 1) +
#coord_fixed(ratio = 1) +
xlim(0,4000) + ylim(0,4000) +
#xlim(0,130000) + ylim(0,130000) +
facet_wrap(~tool_comb, nrow = 4) +
xlab('') +
ylab('')## Warning: The dot-dot notation (`..rr.label..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(rr.label)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 5 rows containing missing values (`geom_smooth()`).
get same data in table
count_conc_lm = count_conc %>%
filter(!tool_1 == tool_2) %>%
filter(!tool_1 == "Sailfish-cir", !tool_2 == 'Sailfish-cir') %>%
group_by(tool_comb) %>%
do({
mod = lm(log10(BSJ_count_2) ~ log10(BSJ_count_1), data = .)
#mod = lm(BSJ_count_2 ~ BSJ_count_1, data = .)
data.frame(intercept = coef(mod)[1],
slope = coef(mod)[2],
R_squared = summary(mod)$r.squared)
})
count_conc_lmcount_conc_lm %>%
#mutate(delta_slope = abs(1-slope)) %>%
ggplot(aes(R_squared, slope)) +
geom_point() +
mytheme +
geom_hline(yintercept = 1, color = '#999999', linetype = 'dashed') +
geom_vline(xintercept = 1, color = '#999999', linetype = 'dashed') +
coord_fixed(ratio = 1)#ggsave('separate_figures/sup_figure_3.pdf', width = 10, height = 15, units = "cm")
# get median value
count_conc_lm %>% pull(slope) %>% quantile()## 0% 25% 50% 75% 100%
## 0.2020794 0.6929837 0.8648796 1.0159395 1.4786552
## 0% 25% 50% 75% 100%
## 0.1919673 0.6218548 0.7142990 0.7579095 1.0000000
## 0% 25% 50% 75% 100%
## 5.662137e-14 6.289033e-02 1.521316e-01 3.168455e-01 7.979206e-01
see panel 2 script
Heatmap based on counts here only for count ≥ 5 (in sup figure for all circ) calculate Jaccard index
jac = read_tsv("../data/Supplementary_Table_7_combo_2tools.txt") %>%
filter(cell_line == 'NCI-H23') %>%
mutate(jac_index = nr_intersection/nr_union,
jac_dist = 1 - jac_index) # calculate jac index and distance## Rows: 675 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (7): tool_1, tool_2, cell_line, count_group, tool_lt_1, tool_lt_2, tool_...
## dbl (9): nr_union, nr_intersection, perc_compound_val_1, perc_compound_val_2...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
count_table = jac %>%
select(tool_1, tool_2, jac_dist) %>%
pivot_wider(values_from = jac_dist, names_from = tool_2)
count_tablern = count_table$tool_1
count_table = count_table %>%
select(-tool_1)
count_matrix = as.matrix(count_table)
rownames(count_matrix) = rn
#pdf("../tmp_figures/sup_figure_5_NCI-H23.pdf", height=8, width=12)
heatmap.2(count_matrix,
main = "NCI-H23 - bin - Jaccard",
trace = "none", density.info = "none")all_circ_db = all_circ %>%
mutate(n_db_group = ifelse(n_db == 1, '1 database', '0 databases'),
n_db_group = ifelse(n_db > 1, '2-5 databases', n_db_group),
n_db_group = ifelse(n_db > 5, '6-9 databases', n_db_group),
n_db_group = ifelse(n_db > 9, '≥ 10 databases', n_db_group),
n_db_group = ifelse(n_db == 16, 'all databases', n_db_group),
n_db_group = ifelse(is.na(n_db), '0 databases', n_db_group))
all_circ_db$n_db_group = factor(all_circ_db$n_db_group, levels = c('all databases', '≥ 10 databases', '6-9 databases', "2-5 databases", '1 database', "0 databases"))
all_circ_db$tool = factor(all_circ_db$tool, levels = c("circseq_cup", "CIRI2", "CIRIquant", "CircSplice", "find_circ", "CirComPara2", "CIRCexplorer3", "circtools", "Sailfish-cir", "NCLscan", "NCLcomparator", "PFv2", "ecircscreen", "KNIFE", "circRNA_finder", "segemehl"))
all_circ_db %>%
ggplot(aes(tool, fill = n_db_group)) +
geom_bar() +
scale_y_continuous(labels = scales::comma_format()) +
facet_wrap(~cell_line) +
ylab('number of circRNAs') +
xlab('') +
scale_fill_manual(name = "circRNAs reported by",
values = c("#0072B2", '#00B9F2', '#00A875', "#E69F00", "#CC79A7")) +
mytheme_discrete_xnumbers for paper
## [1] 0.665
all_circ_db %>%
group_by(tool) %>%
summarise(perc_db = sum(is.na(n_db)) / n()) %>% arrange(perc_db) %>%
pull(perc_db) %>% quantile()## 0% 25% 50% 75% 100%
## 0.01620845 0.03175164 0.05611643 0.20762892 0.87837177
all_circ_enst = all_circ %>%
mutate(ENST_group = ifelse(is.na(ENST), NA, 'one match'),
ENST_group = ifelse(ENST == 'ambiguous', 'ambiguous', ENST_group))
all_circ_enst$ENST_group = factor(all_circ_enst$ENST_group, levels = c('one match', 'ambiguous', NA))
all_circ_enst %>%
ggplot(aes(tool, fill = ENST_group)) +
geom_bar(position = 'fill') +
mytheme_discrete_x +
theme(legend.position = "right") +
ylab('% of circRNAs') +
xlab('') +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_manual(name = "",
values = c('#F0E442', '#CC79A7', '#99999'))calculations
## [1] 52.3
## [1] 10.2
## [1] 37.5
all_circ %>%
mutate(estim_len_ex = end - start,
len_type = 'including all exons and introns') %>%
bind_rows(all_circ %>%
filter(!estim_len_ex == 'ambiguous',
!is.na(estim_len_ex)) %>%
mutate(estim_len_ex = as.numeric(estim_len_ex),
len_type = 'including all exons')) %>%
ggplot(aes(tool, estim_len_ex, color = len_type)) +
geom_boxplot(outlier.shape=NA, lwd = 1/.pt) +
mytheme_discrete_x +
scale_color_manual(values = c( '#CC79A7', '#E69F00')) +
theme(legend.title = element_blank(), legend.position = 'right') +
scale_y_log10(labels = scales::comma_format(), expand = expansion(mult = 0.5)) +
coord_cartesian(ylim = c(50, 110000)) +
ylab("estimated length of circRNAs") +
theme(axis.title.x=element_blank())numbers
all_circ %>%
mutate(estim_len_ex = end - start,
len_type = 'including all exons and introns') %>%
bind_rows(all_circ %>%
filter(!estim_len_ex == 'ambiguous',
!is.na(estim_len_ex)) %>%
mutate(estim_len_ex = as.numeric(estim_len_ex),
len_type = 'including all exons')) %>%
#filter(len_type == 'including all exons and introns') %>%
filter(len_type == 'including all exons') %>%
count(tool)all_circ_exons = all_circ %>%
filter(!nr_exons == 'ambiguous') %>%
mutate(nr_exons = as.numeric(nr_exons),
exon_group = ifelse(nr_exons == 1, 'single exon', NA),
exon_group = ifelse(nr_exons > 1, '2-5 exons', exon_group),
exon_group = ifelse(nr_exons > 5, '6-9 exons', exon_group),
exon_group = ifelse(nr_exons > 9, '≥ 10 exons', exon_group),
nr_exons = as.character(nr_exons)) %>%
bind_rows(all_circ %>%
filter(nr_exons == 'ambiguous') %>%
mutate(exon_group = ifelse(nr_exons == "ambiguous", 'ambiguous', exon_group))) %>%
bind_rows(all_circ %>%
filter(is.na(nr_exons)) %>%
mutate(exon_group = NA))
all_circ_exons %>% filter(exon_group == "ambiguous")all_circ_exons$exon_group = factor(all_circ_exons$exon_group,
levels = c('≥ 10 exons', '6-9 exons', '2-5 exons', 'single exon', 'ambiguous', NA))
all_circ_exons$tool = factor(all_circ_exons$tool, levels = c("circseq_cup", "CIRI2", "CIRIquant", "CircSplice", "find_circ", "CirComPara2", "CIRCexplorer3", "circtools", "Sailfish-cir", "NCLscan", "NCLcomparator", "PFv2", "ecircscreen", "KNIFE", "circRNA_finder", "segemehl"))
all_circ_exons %>%
#filter(!exon_group == 'ambiguous', !is.na(exon_group)) %>%
ggplot(aes(tool, fill = exon_group)) +
geom_bar(position = 'fill') +
mytheme_discrete_x +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_manual(name = "circRNAs with ...",
values = c("#0072B2", '#00B9F2', '#00A875', "#E69F00", '#CC79A7', '#99999')) +
theme(legend.position = NULL) +
xlab('') +
ylab('')first calculate strand distribution linear genes
ens = read_tsv('../../../references/Homo_sapiens.GRCh38.103.genes.gtf') %>%
select(gene_id, strand) %>%
rename(circ_id = gene_id) %>%
mutate(tool = 'lineair genes',
tool_group = 'lin')## Rows: 60666 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (8): chr, database, type, dot, strand, dot2, gene_id, gene_name
## dbl (3): start, end, gene_version
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all_circ_strand = all_circ %>%
mutate(tool_group = 'circ') %>%
bind_rows(ens)
all_circ_strand$tool = factor(all_circ_strand$tool, levels = c("circseq_cup", "CIRI2", "CIRIquant", "CircSplice", "find_circ", "CirComPara2", "CIRCexplorer3", "circtools", "Sailfish-cir", "NCLscan", "NCLcomparator", "PFv2", "ecircscreen", "KNIFE", "circRNA_finder", "segemehl", 'lineair genes'))
all_circ_strand %>%
ggplot(aes(tool, fill = strand)) +
geom_bar(position = 'fill') +
ylab('% of circRNAs') +
scale_y_continuous(labels = scales::percent_format()) +
xlab('') +
facet_grid(~tool_group, scales = 'free_x', space = 'free') +
scale_fill_manual(values = c('#009E73', '#CC79A7' , '#5AB4E5')) +
mytheme_discrete_x +
theme(legend.position = 'right')